# SETUP  

# Uncomment the following lines to install packages if they are not already installed
# install.packages("tidyverse")
# install.packages("readxl")
# install.packages("ggplot2")
# install.packages("car")
# install.packages("emmeans")

library(tidyverse)  # data manipulation + visualization
library(readxl)     # reading Excel files if needed
library(ggplot2)    # plotting
library(car)        # ANOVA and type III sums of squares
library(dplyr)      # explicit data manipulation
library(emmeans)    # estimated marginal means for contrasts

options(stringsAsFactors = FALSE)  # avoid automatic factor conversion
options(contrasts = c("contr.sum", "contr.poly"))  # sum-to-zero contrasts

# navigating to user's downloads
# if the zip file is not in your downloads folder, set file.path() manually
downloads_dir <- if (.Platform$OS.type == "windows") {
  file.path(Sys.getenv("USERPROFILE"), "Downloads")
} else {
  file.path(Sys.getenv("HOME"), "Downloads")
}

# Getting path to the ZIP file
zip_file_path <- file.path(downloads_dir, "2011-AsymmetricDiscounting-AppeltEtAl.zip")

# unzip 
extract_dir <- tempdir()
unzip(zip_file_path, exdir = extract_dir)


# Unable to recreate processing the raw data to get n=617 rows and so on as some of the notes have been lost over time.
# 279 participants
study2 <- read.csv(file.path(extract_dir,"2011-AsymmetricDiscounting-AppeltEtAl/Asym_Disc_2011_Study2/Asym_Disc_Study2_CLEANPIIremoved.csv"))


View(study2) 
numRows <- nrow(study2) 
print(numRows)  


#DEMOGRAPHICS
PercentFemale <- (filter(study2, Gender == "Female") %>% nrow() / numRows * 100) %>% round(0) 
print(PercentFemale)  

colnames(study2)[colnames(study2) == 'Whatyearwereyouborn'] <- 'BirthYear'

#fix error where one participant swapped year of birth and age
study2$AGE[study2$AGE > 1000] 
study2$BirthYear[study2$AGE > 1000] 
study2$serial[study2$AGE > 1000]
tempYear <- study2$AGE[study2$AGE > 1000] 
study2$AGE[study2$AGE > 1000] <- study2$BirthYear[study2$AGE > 1000] 
study2$BirthYear[study2$serial == 54545]  <- tempYear

mean(study2$AGE)
sd(study2$AGE)

colnames(study2)[colnames(study2) == 'Whatisyourhighestlevelofeducation'] <- 'LevelofEducation'
PercentAtLeastTwoYearDegree <- (study2 %>% filter(LevelofEducation != "No degree", LevelofEducation != "High school diploma") %>% nrow() / numRows * 100) %>% round(0) 
print(PercentAtLeastTwoYearDegree)  

PercentMarried <- (study2 %>% filter(MaritalStatus == "Married") %>% nrow() / numRows * 100) %>% round(0) 
print(PercentMarried)  

PercentHaveChildren <- (study2 %>% filter(KIDS > 0) %>% nrow() / numRows * 100) %>% round(0) 
print(PercentHaveChildren)  


unique(study2$Householdincome)

#median income calculated here doesn't match paper, possible median was calculated differently

# fixing error in spacing
study2$Householdincome[study2$Householdincome ==  "$50,000- $99,999"] <- "$50,000 - $99,999"

# Define the order
income_levels <- c("$10,000 - $19,999",
                   "$20,000 - $34,999",
                   "$35,000 - $49,999",
                   "$50,000 - $99,999",
                   "$100,000 - $199,999")

# Convert to ordered factor
study2$income_factor <- factor(study2$Householdincome, 
                             levels = income_levels, 
                             ordered = TRUE)

# Convert to rank-ordered numbers
income_numeric <- as.numeric(study2$income_factor)
income_numeric
income_levels[median(income_numeric)]


# RESULTS   

#fix error where one row doesn't have order
study2$order <- trimws(study2$order)
study2$order[study2$order == ""] <- "natural"
study2$order <- factor(study2$order, levels = c("natural", "unnatural"))
study2 %>% count(order)


nrow(filter(study2, order == "natural", titr_condition == "gd")) #33
nrow(filter(study2, order == "natural", titr_condition == "ga")) #33
nrow(filter(study2, order == "natural", titr_condition == "ld")) #41
nrow(filter(study2, order == "natural", titr_condition == "la")) #34

nrow(filter(study2, order == "unnatural", titr_condition == "gd")) #34
nrow(filter(study2, order == "unnatural", titr_condition == "ga")) #37
nrow(filter(study2, order == "unnatural", titr_condition == "ld")) #33
nrow(filter(study2, order == "unnatural", titr_condition == "la")) #34


#Impute Switch Points for Those Who Did Not Swap (Switch type is all variable or all fixed)

# 1.Gain Delayed 
#all fixed - always today (WTA $50 today rather than $90 in future)
study2$sig_var[which(study2$titr_condition == "gd" & study2$switch_type == "all_fixed")] <- 95 
nrow(filter(study2, sig_var == 95)) #0
#all var - always future (WTA $40 in future over $50 today)
study2$sig_var[which(study2$titr_condition == "gd" & study2$switch_type == "all_var")] <- 40
nrow(filter(study2, sig_var == 40)) #4

#2. Gain Accelerated
#all fixed - always future (WTA $75 in future over $85 today)
study2$sig_var[which(study2$titr_condition == "ga" & study2$switch_type == "all_fixed")] <- 90
nrow(filter(study2, sig_var == 90)) #4
#all var - always today (WTA $35 in future over $75 in fuutre)
study2$sig_var[which(study2$titr_condition == "ga" & study2$switch_type == "all_var")] <- 35
nrow(filter(study2, sig_var == 35)) #0

#3. Loss Delay
#all fixed - always today (WTP $50 today over $40 in future)
study2$sig_var[which(study2$titr_condition == "ld" & study2$switch_type == "all_fixed")] <- 35
nrow(filter(study2, sig_var == 35)) #13
#all var - always future (WTP $90 in future over $50 today)
study2$sig_var[which(study2$titr_condition == "ld" & study2$switch_type == "all_var")] <- 90
nrow(filter(study2, sig_var == 90)) #6

#4. Loss Accelerated
#all fixed - always future (WTP $75 in future over $35 today)
study2$sig_var[which(study2$titr_condition == "la" & study2$switch_type == "all_fixed")] <- 30
nrow(filter(study2, sig_var == 30)) #1
#all var - always today (WTP $85 today over $75 in future)
study2$sig_var[which(study2$titr_condition == "la" & study2$switch_type == "all_var")] <- 85
nrow(filter(study2, sig_var == 85)) #5

## find indiference point
# the indifference point is the option switched at, averaged with the next available option 
# this is 5 higher or 5 lower depending on whether the condition is gain or loss
gain_conditions <- study2$sign == "gain"
study2$indif_point[gain_conditions] <- (study2$sig_var[gain_conditions] + (study2$sig_var[gain_conditions] - 5)) / 2

loss_conditions <- study2$sign == "loss"
study2$indif_point[loss_conditions] <- (study2$sig_var[loss_conditions] + (study2$sig_var[loss_conditions] + 5)) / 2


#2.2.1 Asymmetric discounting

# Calculate k for each category
study2$k <- 0 

# Delayed
delay_rows <- study2$direction == "delay"
study2$k[delay_rows] <- (study2$indif_point[delay_rows] - 50) / (50 * 1/4) 
round(mean(study2$k[delay_rows]), 2) #0.51

# Accelerated
accelerate_rows <- study2$direction == "accel"
study2$k[accelerate_rows] <- (75 - study2$indif_point[accelerate_rows]) / (study2$indif_point[accelerate_rows] * 1/4) 
round(mean(study2$k[accelerate_rows]), 2) #0.63


# Display num observations, mean k, SD k, and range for each category (delay/acc) and (nat/unnat order)
study2 %>%
  mutate(titr_condition = factor(titr_condition, levels = c("gd", "ga", "ld", "la"))) %>%
  group_by(order, titr_condition) %>%
  summarise(
    n = n(),
    mean_k = mean(k),
    sd_k = sd(k),
    range_k = max(k) - min(k),
    .groups = "drop"
  ) %>%
  arrange(order, titr_condition)


#ANOVA for observations in the natural order
natural_data <- study2 %>%  filter(order == "natural") %>%
  mutate(
    sign = factor(sign, levels = c("gain", "loss")),
    direction = factor(direction, levels = c("delay", "accel")))

anova_natural <- aov(k ~ sign * direction, data = natural_data)
anova_nattable <- Anova(anova_natural, type = "III")
anova_nattable

# Partial eta-squared for natural order
SS_effect_natural <- anova_nattable$`Sum Sq`[2:4]  # sign, direction, sign:direction
SS_error_natural  <- anova_nattable$`Sum Sq`[5]    # Residuals
partial_eta2_natural <- SS_effect_natural / (SS_effect_natural + SS_error_natural)
names(partial_eta2_natural) <- rownames(anova_nattable)[2:4]
partial_eta2_natural

# Planned contrast tests for natural order
em <- emmeans(anova_natural, ~ direction | sign)
contrast(em, "pairwise", simple = "each")

# Cohen's d (using unweighted pooled SD, as in paper)
cohens_d <- function(x, g) {
  grp1 <- x[g == levels(g)[1]]
  grp2 <- x[g == levels(g)[2]]
  mean_diff <- mean(grp1) - mean(grp2)
  pooled_sd <- sqrt((var(grp1) + var(grp2)) / 2)  # unweighted
  d <- mean_diff / pooled_sd
  return(d)
}

# Gains (natural order)
gain_data <- natural_data %>% filter(sign == "gain")
gain_data$direction <- factor(gain_data$direction, levels = c("delay", "accel"))
d_gain <- cohens_d(gain_data$k, gain_data$direction)
d_gain

# Losses (natural order)
loss_data <- natural_data %>% filter(sign == "loss")
loss_data$direction <- factor(loss_data$direction, levels = c("accel", "delay"))
d_loss <- cohens_d(loss_data$k, loss_data$direction)
d_loss


#ANOVA for observations in the unnatural order
unnatural_data <- study2 %>%  filter(order == "unnatural") %>%
  mutate(
    sign = factor(sign, levels = c("gain", "loss")),
    direction = factor(direction, levels = c("delay", "accel")))

anova_unnatural <- aov(k ~ sign * direction, data = unnatural_data)
anova_unnattable <- Anova(anova_unnatural, type = "III")
anova_unnattable

# Partial eta-squared for unnatural order
SS_effect_unnatural <- anova_unnattable$`Sum Sq`[2:4]  # sign, direction, sign:direction
SS_error_unnatural  <- anova_unnattable$`Sum Sq`[5]    # Residuals
partial_eta2_unnatural <- SS_effect_unnatural / (SS_effect_unnatural + SS_error_unnatural)
names(partial_eta2_unnatural) <- rownames(anova_unnattable)[2:4]
partial_eta2_unnatural

#2.2.2 Prominence of now thoughts
# need to determine number of now thoughts and number of later thoughts for each participant 

#calculate SMRD
aspects <- read.csv(file.path(extract_dir,"2011-AsymmetricDiscounting-AppeltEtAl/Asym_Disc_2011_Study2/Raw Data Hashed/5 all aspects.csv"))

#only look at aspects for the 279 participants analyzed in the main file
aspects_filtered <- aspects %>%
  filter(serial %in% study2$serial)

nrow(aspects)
nrow(aspects_filtered) #2544 aspects across the 279 serials

unique(aspects_filtered$category_text)
# [1] "something <b>bad</b> about paying the fine <b>later</b>"            
# [2] "paying the fine <b>now</b>"                                             
# [3] "something <b>good</b> about paying the fine <b>now</b>"                 
# [4] "something <b>good</b> about paying the fine <b>later</b>"               
# [5] "paying the fine <b>later</b>"                                           
# [6] "something <b>good</b> about receiving the gift certificate <b>later</b>"
# [7] "receiving the gift certificate <b>later</b>"                            
# [8] "something <b>good</b> about receiving the gift certificate <b>now</b>"  
# [9] "receiving the gift certificate <b>now</b>"                              
# [10] "something <b>bad</b> about receiving the gift certificate <b>now</b>"   
# [11] "none of the above"                                                      
# [12] "neither"                                                                
# [13] "something <b>bad</b> about paying the fine <b>now</b>"                  
# [14] "something <b>bad</b> about receiving the gift certificate <b>later</b>" 


#Note: the paper has n=275 participants for aspect listings, with 4 participants providing irrelevant thoughts filtered out.
# Currently, it is not clear which 4 participants were filtered out, so the analyses below continue with n=279.
# As a result, the below calculations have different values than those in the paper. Overall, the patterns of means are very similar, the significance levels for the key hypotheses are unchanged, and the effect sizes are generally reduced a bit. 

aspects_filtered %>%
  group_by(serial) %>%
  summarise(
    n_total = n(),
    n_invalid = sum(category_text %in% c("none of the above", "neither", "")),
    n_valid = n_total - n_invalid
  ) %>%
  arrange(n_valid)


thoughts_favor <- aspects_filtered %>%
  filter(category_text %in% c(
    "something <b>bad</b> about paying the fine <b>later</b>",       
    "something <b>good</b> about paying the fine <b>now</b>",
    "something <b>good</b> about receiving the gift certificate <b>now</b>",
    "something <b>bad</b> about receiving the gift certificate <b>later</b>",
    "something <b>good</b> about paying the fine <b>later</b>",  
    "something <b>good</b> about receiving the gift certificate <b>later</b>",
    "something <b>bad</b> about receiving the gift certificate <b>now</b>",
    "something <b>bad</b> about paying the fine <b>now</b>"     
  )) %>%
  mutate(favor_type = case_when(
    category_text %in% c(
      "something <b>bad</b> about paying the fine <b>later</b>",       
      "something <b>good</b> about paying the fine <b>now</b>",
      "something <b>good</b> about receiving the gift certificate <b>now</b>",
      "something <b>bad</b> about receiving the gift certificate <b>later</b>" 
    ) ~ "now",
    category_text %in% c(
      "something <b>good</b> about paying the fine <b>later</b>",  
      "something <b>good</b> about receiving the gift certificate <b>later</b>",
      "something <b>bad</b> about receiving the gift certificate <b>now</b>",
      "something <b>bad</b> about paying the fine <b>now</b>"     
    ) ~ "later"
  ))


thoughts_ranked <- thoughts_favor %>%
  group_by(serial) %>%
  arrange(aspect_id, .by_group = TRUE) %>%   # ensure sorted by aspect_id inside each serial
  mutate(rank_order = min_rank(aspect_id)) %>%     # lowest aspect_id = rank 1
  ungroup()

total_aspects_per_serial <- aspects_filtered %>%
  group_by(serial) %>%
  summarise(total_aspects = n_distinct(aspect_id))


favor_medians <- thoughts_ranked %>%
  group_by(serial) %>%
  summarise(
    n_now = sum(favor_type == "now"),
    n_later = sum(favor_type == "later"),
    mr_now = ifelse(n_now > 0, median(rank_order[favor_type == "now"]), NA),
    mr_later = ifelse(n_later > 0, median(rank_order[favor_type == "later"]), NA),
    .groups = "drop"
  ) %>%
  mutate(
    # Total number of favoring thoughts
    asp_num_favor = n_now + n_later,
    
    # Assign extreme SMRD if participant has only one type
    SMRD = case_when(
      n_now == 0 ~ -1,      # only later thoughts
      n_later == 0 ~ 1,     # only now thoughts
      TRUE ~ 2 * (mr_later - mr_now) / asp_num_favor
    )
  )
data2 <- left_join(
  study2, 
  dplyr::select(favor_medians, serial, SMRD, n_now, n_later, asp_num_favor, mr_now, mr_later),
  by = "serial"
)

nrow(data2)

data2 <- data2 %>%
  filter(!is.na(SMRD))

smrd_by_condition <- data2 %>%
  group_by(order, titr_condition) %>% 
  summarise(
    mean_SMDR = mean(SMRD, na.rm = TRUE),
    sd_SMDR = sd(SMRD, na.rm = TRUE),
    n = n()
  )



smrd_by_condition

data2$rel_now <- data2$n_now - data2$n_later


data2$sign <- factor(data2$sign, levels = c("gain", "loss"))
data2$direction <- factor(data2$direction, levels = c("delay", "accel"))
data2$order <- factor(data2$order, levels = c("natural", "unnatural"))

# Compute prominence of now thoughts as average of SMRD_z and rel_now_z
data2$rel_now_z <- (data2$rel_now - mean(data2$rel_now, na.rm = TRUE)) / sd(data2$rel_now, na.rm = TRUE)
data2$SMRD_z <- (data2$SMRD - mean(data2$SMRD, na.rm = TRUE)) / 
  sd(data2$SMRD, na.rm = TRUE)
data2$prominence_now <- rowMeans(data2[, c("SMRD_z", "rel_now_z")], na.rm = TRUE)
summary(data2$prominence_now)


natural_data <- data2 %>%
  filter(order == "natural") %>%
  mutate(
    sign = factor(sign, levels = c("gain", "loss")),
    direction = factor(direction, levels = c("delay", "accel"))
  )

unnatural_data <- data2 %>%
  filter(order == "unnatural") %>%
  mutate(
    sign = factor(sign, levels = c("gain", "loss")),
    direction = factor(direction, levels = c("delay", "accel"))
  )

natural_data$sign <- factor(natural_data$sign, levels = c("gain", "loss"))
natural_data$direction <- factor(natural_data$direction, levels = c("delay", "accel"))


natural_data %>% group_by(sign) %>% summarise(mean = mean(prominence_now), sd = sd(prominence_now))
natural_data %>% group_by(direction) %>% summarise(mean = mean(prominence_now), sd = sd(prominence_now))


unnatural_data %>% group_by(sign) %>% summarise(mean = mean(prominence_now), sd = sd(prominence_now))
unnatural_data %>% group_by(direction) %>% summarise(mean = mean(prominence_now), sd = sd(prominence_now))


prominence_summary <- data2 %>%
  group_by(order, sign, direction) %>%
  summarise(
    mean_prominence_now = mean(prominence_now, na.rm = TRUE),
    sd_prominence_now   = sd(prominence_now, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )

prominence_summary


# ANOVA (natural order)
anova_natural <- aov(prominence_now ~ sign * direction, data = natural_data)
summary(anova_natural)

# Type III sum of squares for partial eta-squared
anova_nattable <- Anova(anova_natural, type = "III")
anova_nattable

SS_effect_natural <- anova_nattable$`Sum Sq`[2:4]  # sign, direction, sign:direction
SS_error_natural  <- anova_nattable$`Sum Sq`[5]    # residuals
partial_eta2_natural <- SS_effect_natural / (SS_effect_natural + SS_error_natural)
names(partial_eta2_natural) <- rownames(anova_nattable)[2:4]
partial_eta2_natural

# ANOVA (unnatural order)
anova_unnatural <- aov(prominence_now ~ sign * direction, data = unnatural_data)
summary(anova_unnatural)

anova_unnattable <- Anova(anova_unnatural, type = "III")
anova_unnattable

SS_effect_unnatural <- anova_unnattable$`Sum Sq`[2:4]
SS_error_unnatural  <- anova_unnattable$`Sum Sq`[5]
partial_eta2_unnatural <- SS_effect_unnatural / (SS_effect_unnatural + SS_error_unnatural)
names(partial_eta2_unnatural) <- rownames(anova_unnattable)[2:4]
partial_eta2_unnatural


